Read in the splicing results returned by MAJIQ and make a volcano plot, only highlight genes of interest with a label.
ipsc_splicing = fread(file.path(here::here(),"data","ipsc_splicing_results.csv"))
ipsc_de = fread(file.path(here::here(),"data","ipsc_differential_expression.csv"))
splicing_dots_tables <- ipsc_splicing %>%
mutate(junction_name = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2") &
p_d_psi_0_10_per_lsv_junction > 0.9 &
deltaPSI > 0 ~ gene_name,
T ~ "")) %>%
mutate(`Novel Junction` = de_novo_junctions == 0) %>%
mutate(log10_test_stat = -log10(1 - p_d_psi_0_10_per_lsv_junction)) %>%
mutate(log10_test_stat = ifelse(is.infinite(log10_test_stat), 16, log10_test_stat)) %>%
mutate(graph_alpha = ifelse(p_d_psi_0_10_per_lsv_junction > 0.9, 1, 0.2)) %>%
mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2") &
p_d_psi_0_10_per_lsv_junction > 0.9 &
deltaPSI > 0 ~ junction_name,
T ~ ""))
fig1a = ggplot() +
geom_point(data = splicing_dots_tables %>% filter(de_novo_junctions != 0),
aes(x = deltaPSI, y =log10_test_stat,
alpha = graph_alpha,,fill = "Annotated Junction"), pch = 21, size = 4) +
geom_point(data = splicing_dots_tables %>% filter(de_novo_junctions == 0),
aes(x = deltaPSI, y =log10_test_stat,
alpha = graph_alpha,fill = "Novel Junction"), pch = 21, size = 4) +
geom_text_repel(data = splicing_dots_tables[junction_name != ""],
aes(x = deltaPSI, y =log10_test_stat,
label = label_junction,
color = as.character(de_novo_junctions)), point.padding = 0.3,
nudge_y = 0.2,
min.segment.length = 0,
box.padding = 2,
size=6,show.legend = F) +
geom_hline(yintercept = -log10(1 - .9)) +
scale_fill_manual(name = "",
breaks = c("Annotated Junction", "Novel Junction"),
values = c("Annotated Junction" = "#648FFF", "Novel Junction" = "#fe6101") ) +
scale_color_manual(name = "",
breaks = c("0", "1"),
values = c("1" = "#648FFF", "0" = "#fe6101") ) +
guides(alpha = FALSE, size = FALSE) +
theme(legend.position = 'top') +
ggpubr::theme_pubr() +
xlab("delta PSI") +
ylab(expression(paste("-Lo", g[10], " Test Statistic"))) +
theme(text = element_text(size = 24)) +
theme(legend.text=element_text(size=22)) +
xlim(-1,1)
de_table = ipsc_de %>%
mutate(contains_cryptic = gene_name %in% splicing_dots_tables[cryptic_junction == T,unique(gene_name)]) %>%
mutate(contains_cryptic = as.character(as.numeric(contains_cryptic))) %>%
mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2","TARDBP") ~ gene_name,
T ~ "")) %>%
mutate(graph_alpha = ifelse(padj < 0.1, 1, 0.2)) %>%
mutate(y_data = -log10(padj))
fig2a = ggplot(data = de_table) +
geom_point(data = de_table %>% filter(contains_cryptic == "0"),
aes(x = log2FoldChange, y = -log10(padj),
alpha = graph_alpha,fill = "No Cryptic"), pch = 21, size = 4) +
geom_point(data = de_table %>% filter(contains_cryptic == "1"),
aes(x = log2FoldChange, y = -log10(padj),
alpha = graph_alpha,fill = "Contains Cryptic"), pch = 21, size = 4) +
geom_text_repel(data = de_table[label_junction != ""],max.overlaps = 500,
aes(x = log2FoldChange,
y = -log10(padj),
label = label_junction,
color = as.character(contains_cryptic)),
nudge_y = 5,
min.segment.length = 0,
box.padding = 4,
size=6,show.legend = F) +
scale_fill_manual(name = "",
breaks = c("No Cryptic", "Contains Cryptic"),
values = c("No Cryptic" = "#648FFF", "Contains Cryptic" = "#fe6101") ) +
scale_color_manual(name = "",
breaks = c("1", "0"),
values = c("1" = "#fe6101", "0" = "#648FFF") ) +
xlim(-7.5,7.5) +
ylab(expression(paste("-Lo", g[10], " P-value"))) +
guides(alpha = FALSE, size = FALSE) +
theme(legend.position = 'top') +
ggpubr::theme_pubr() +
xlab(expression(paste("Lo", g[2], " Fold Change"))) +
theme(text = element_text(size = 24)) +
theme(legend.text=element_text(size=22)) +
geom_hline(yintercept = -log10(0.1)) +
geom_vline(xintercept=c(0), linetype="dotted")
We have 2 PSI’s for the UNC13A cryptic, one which includes both the short and long form of the cryptic, and one which does not. While we’re not mentioning in figure 1, given that the longer form of the cryptic appears in control cerebellum, one could argue that that junction should not be included in the PSI calculation here for UNC13A cryptic per se. In practice, this makes very little difference in the conclusions made on the cell lines, so I’ve included both for completeness.
The ‘C/G’ tells which genotypes were supported by RNA-seq on rs12973192. The NB cell lines are het, as is the WTC11 cell line. SH-SY5Y cells are homozygote for the major allele. There was variability on the Klim hMN set on allelic expression.
rel_rna_cryptic_amount = data.table::fread(file.path(here::here(),"data","kd_experiments_relative_rna_and_unc13a_cryptic_junction_counts.csv"))
rel_rna_cryptic_amount[,cryptic_psi_full := ( UNC13A_3prime +
UNC13A_5prime + UNC13A_5prime_2 +
UNC13A_5prime_3) / (UNC13A_annotated + UNC13A_3prime +
UNC13A_5prime + UNC13A_5prime_2 +
UNC13A_5prime_3)]
####barplot UNC13A CE PSI - full five####
unc13a_cryptic_full = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "cryptic_psi_full",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(name = "",
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(name = "",
values = c("#1C2617","#262114")
) +
ylab("UNC13A CE PSI") +
xlab("") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
####barplot UNC13B NMD PSI####
unc13b_psi_nmd = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "unc13b_nmd_exon_psi",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(name = "",
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(name = "",
values = c("#1C2617","#262114")
) +
ylab("UNC13B \nNMD Exon PSI") +
xlab("") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = cryptic_psi_full, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = cryptic_psi_full),size = 12) +
geom_smooth(aes(x = TARDBP, y = cryptic_psi_full),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A CE PSI") +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom')
## `geom_smooth()` using formula 'y ~ x'
####UNC13A RNA and UNC13A Cryptic####
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = UNC13A, y = cryptic_psi_full, fill = source),pch = 21,size = 6) +
stat_cor(aes(x = UNC13A, y = cryptic_psi_full),size = 12) +
geom_smooth(aes(x = UNC13A, y = cryptic_psi_full),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A CE PSI") +
expand_limits(y = 1) +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom') +
theme(text = element_text(size = 18,family = 'sans'),
legend.text = element_text(size = 20,family = 'sans'),
axis.title = element_text(size = 32),
axis.text = element_text(size = 32))
## `geom_smooth()` using formula 'y ~ x'
####UNC13A RNA and UNC13A Cryptic####
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = cryptic_psi_full, fill = source),pch = 21,size = 6) +
stat_cor(aes(x = TARDBP, y = cryptic_psi_full),size = 12) +
geom_smooth(aes(x = TARDBP, y = cryptic_psi_full),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A CE PSI") +
expand_limits(y = 1) +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom') +
theme(text = element_text(size = 18,family = 'sans'),
legend.text = element_text(size = 20,family = 'sans'),
axis.title = element_text(size = 32),
axis.text = element_text(size = 32))
## `geom_smooth()` using formula 'y ~ x'
####UNC13A RNA and UNC13A IR####
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = UNC13A, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = UNC13A, y = normalized_unc13a_ir),size = 12) +
geom_smooth(aes(x = UNC13A, y = cryptic_psi_full),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A Normalized IR") +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom')
## `geom_smooth()` using formula 'y ~ x'
####TARDBP RNA and UNC13A IR####
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = normalized_unc13a_ir),size = 12) +
geom_smooth(aes(x = TARDBP, y = cryptic_psi_full),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A Normalized IR") +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom')
## `geom_smooth()` using formula 'y ~ x'
unca = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "cryptic_psi_full",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("UNC13A CE PSI") +
xlab("") +
guides(color = FALSE)
stmn2 = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "stmn_2_cryptic_psi",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("STMN2 Cryptic PSI") +
xlab("") +
guides(color = FALSE)
####barplot UNC13B normalized IR####
unc13b_ir = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "normalized_unc13b_ir",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("Normalized UNC13B \nIntron Retention Ratio") +
xlab("") +
guides(color = FALSE) +
theme(legend.position = "none") +
theme(text = element_text(size = 18)) +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
####barplot UNC13A normalized IR####
unc13a_ir = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "normalized_unc13a_ir",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("Normalized UNC13A \nIntron Retention Ratio") +
xlab("") +
theme(legend.position = "none") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
ggarrange(unc13a_cryptic_full,unc13b_psi_nmd,unc13a_ir,unc13b_ir,common.legend = T)
####scatterplot stmn2 normalized TARDBP####
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = stmn_2_cryptic_psi, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = stmn_2_cryptic_psi)) +
theme(legend.position = 'bottom') +
facet_wrap(~source)
####scatterplot UNC13BIR normalized TARDBP####
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')
####scatterplot UNC13AIR and UNC13A Crptic####
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = cryptic_psi_full, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = cryptic_psi_full, y = normalized_unc13a_ir)) +
theme(legend.position = 'bottom')
####scatterplot UNC13BIR and UNC13A Crptic####
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = cryptic_psi_full, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = cryptic_psi_full, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')+
facet_wrap(~source)
####scatterplot UNC13BIR and UNC13B NMD####
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = unc13b_nmd_exon_psi, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = unc13b_nmd_exon_psi, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')+
facet_wrap(~source)
####barplot UNC13A RNA level####
First let’s look at only including tissues with detectable stmn2 or UNC13A CE
clean_data_table = fread(file.path(here::here(),"data","nygc_junction_information.csv"))
clean_data_table = clean_data_table %>%
mutate(call = fct_relevel(call,
"C/C", "C/G", "G/G")) %>%
mutate(number_g_alleles = as.numeric(call) - 1) %>%
mutate(unc13a_cryptic_leaf_psi = ifelse(is.na(unc13a_cryptic_leaf_psi),0,unc13a_cryptic_leaf_psi))
print(glue::glue("Number of unique patients: {clean_data_table[,length(unique(participant_id))]}"))
## Number of unique patients: 377
print(glue::glue("Number of unique tissue samples: {clean_data_table[,length(unique(sample))]}"))
## Number of unique tissue samples: 1349
print("Patients Per Disease Category")
## [1] "Patients Per Disease Category"
clean_data_table[,length(unique(participant_id)),by = disease]
## disease V1
## 1: ALS-FTD 23
## 2: ALS 193
## 3: Control 77
## 4: Other 11
## 5: FTD 61
## 6: ALS-AD 12
print("Tissues Per Disease Category")
## [1] "Tissues Per Disease Category"
clean_data_table[,length(unique(sample)),by = disease]
## disease V1
## 1: ALS-FTD 110
## 2: ALS 764
## 3: Control 199
## 4: Other 70
## 5: FTD 138
## 6: ALS-AD 68
print("Number of patients per rs12973192 genotype")
## [1] "Number of patients per rs12973192 genotype"
clean_data_table[,length(unique(participant_id)),by = call]
## call V1
## 1: C/C 166
## 2: G/G 58
## 3: C/G 153
print("Number of tissues per disease")
## [1] "Number of tissues per disease"
clean_data_table[,.N,by = c("disease","tissue_clean")]
## disease tissue_clean N
## 1: ALS-FTD Frontal_Cortex 22
## 2: ALS Frontal_Cortex 132
## 3: Control Frontal_Cortex 40
## 4: Other Frontal_Cortex 11
## 5: ALS-FTD Lumbar_Spinal_Cord 15
## 6: ALS Lumbar_Spinal_Cord 105
## 7: Control Lumbar_Spinal_Cord 33
## 8: Other Lumbar_Spinal_Cord 9
## 9: ALS-FTD Cervical_Spinal_Cord 14
## 10: ALS Cervical_Spinal_Cord 103
## 11: Control Cervical_Spinal_Cord 32
## 12: Other Cervical_Spinal_Cord 10
## 13: ALS-FTD Motor_Cortex 28
## 14: ALS Motor_Cortex 175
## 15: Control Motor_Cortex 23
## 16: Other Motor_Cortex 16
## 17: ALS-FTD Cerebellum 13
## 18: ALS Cerebellum 129
## 19: Control Cerebellum 28
## 20: Other Cerebellum 8
## 21: FTD Cerebellum 58
## 22: FTD Frontal_Cortex 45
## 23: ALS-AD Cerebellum 11
## 24: ALS-AD Motor_Cortex 13
## 25: ALS-AD Cervical_Spinal_Cord 10
## 26: ALS-AD Lumbar_Spinal_Cord 11
## 27: ALS-AD Frontal_Cortex 12
## 28: ALS-AD Occipital_Cortex 7
## 29: ALS-AD Thoracic_Spinal_Cord 4
## 30: ALS Occipital_Cortex 37
## 31: ALS Thoracic_Spinal_Cord 33
## 32: ALS-FTD Occipital_Cortex 6
## 33: Control Temporal_Cortex 23
## 34: ALS Temporal_Cortex 23
## 35: FTD Temporal_Cortex 35
## 36: Other Occipital_Cortex 7
## 37: Other Thoracic_Spinal_Cord 6
## 38: Control Thoracic_Spinal_Cord 5
## 39: Control Occipital_Cortex 5
## 40: ALS-FTD Thoracic_Spinal_Cord 5
## 41: ALS Hippocampus 27
## 42: ALS-FTD Hippocampus 7
## 43: Other Hippocampus 3
## 44: Control Hippocampus 10
## disease tissue_clean N
print("Number of partcipants by mutation and disease")
## [1] "Number of partcipants by mutation and disease"
clean_data_table[,length(unique(participant_id)),by = c("mutations","disease")]
## mutations disease V1
## 1: None ALS-FTD 13
## 2: None ALS 145
## 3: C9orf72 ALS-FTD 10
## 4: None Control 77
## 5: None Other 11
## 6: SOD1 ALS 8
## 7: OPTN ALS 4
## 8: C9orf72 ALS 32
## 9: MATR3 ALS 1
## 10: ANG ALS 1
## 11: C9orf72 FTD 12
## 12: None ALS-AD 11
## 13: None FTD 42
## 14: C9orf72 ALS-AD 1
## 15: TBK1 FTD 2
## 16: MAPT FTD 5
## 17: FUS ALS 2
print(glue::glue("Number of patients per pathology:"))
## Number of patients per pathology:
clean_data_table[,length(unique(participant_id)),by = .(pathology)]
## pathology V1
## 1: ALS-FTD 23
## 2: ALS 193
## 3: control 77
## 4: Other 11
## 5: 13
## 6: ALS-AD 12
## 7: FTD-TDP-A 24
## 8: FTD-TDP-B 3
## 9: FTD-TDP-C 9
## 10: FTD-TAU 7
## 11: FTD-FUS 5
FTLD-non-TDP are those with TAU and FUS aggregates
Non-tdp ALS are those with SOD1 or FUS mutations. The n’s are quite low on this unfortunately, only 8 ALS with SOD1 and 2 with FUS mutations.
First we look at detection rate in tissues affected by TDP-43 proteinopathy, For FTLD this is frontal and temporal Cortices, and for ALS this is lumbar, cervical, and thoracic spinal cord samples as well as motor cortex. For controls we also take all 6 tissues, frontal,temporal,motor cortices and the lumbar, cervical, and thoracic spinal cords.
(As a side note, once we do this the number of ALS-non-TDP drops down to 6 (2 FUS) because the ALS sample tissues are not balanced and not every participant has samples in every tissue)
####Inclusion reads by if TDP-potential####
boxplot_table = clean_data_table %>%
mutate(across(UNC13A_3prime_leaf:UNC13A_annotated_leaf, ~ .x / library_size,.names = "{.col}_library_norm")) %>%
filter(!tissue_clean %in% c("Choroid","Liver")) %>%
dplyr::select(sample,participant_id,mutations,disease_group2,pathology,tissue_clean,contains("_library_norm")) %>%
melt() %>%
filter(grepl("_3prime|_5prime_1",variable)) %>%
group_by(sample) %>%
mutate(inclusion_reads = sum(value)) %>%
ungroup() %>%
unique() %>%
mutate(junction_name = case_when(variable == "UNC13A_3prime_leaf_library_norm" ~ " Novel Donor",
variable == "UNC13A_5prime_1_leaf_library_norm" ~ " Short Novel Acceptor",
variable == "UNC13A_5prime_2_leaf_library_norm"~ "Long Novel Acceptor")) %>%
mutate(disease_tissue = case_when((grepl("FTLD",disease_group2) & grepl("Cortex",tissue_clean)) ~ T,
(grepl("ALS",disease_group2) & grepl("Cord|Motor",tissue_clean)) ~ T,
(grepl("Occipital",tissue_clean)) ~ F,
(grepl("Control",disease_group2) & grepl("Cord|Cortex",tissue_clean)) ~ T,
TRUE ~ F)) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean))
melt_count = clean_data_table[,.(sample,UNC13A_3prime_leaf,UNC13A_5prime_1_leaf,UNC13A_5prime_2_leaf)] %>% data.table::melt() %>% setnames(.,"value","orig_count")
Looking at disease tissue only, so just taking the cords in ALS and the frontal and temporal cortex of FTLD and then the cord and cortices in Controls.
boxplot_table %>%
filter(disease_tissue == T) %>%
mutate(detected = inclusion_reads > 0) %>%
dplyr::select(participant_id,disease_group2,detected) %>%
unique() %>%
group_by(disease_group2) %>%
mutate(n_sample = n_distinct(participant_id)) %>%
mutate(n_sample_detected = sum(detected)) %>%
dplyr::select(disease_group2,n_sample,n_sample_detected) %>%
unique() %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
ggplot() +
geom_col(aes(x = detection_name, y = detection_rate)) +
ggpubr::theme_pubr() +
scale_y_continuous(lim = c(0,1),labels = scales::percent) +
ylab("Percent of Patients \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 24)) +
xlab("N individuals")
boxplot_table %>%
filter(!(tissue_clean %in% c("Cerebellum","Hippocampus","Occipital Cortex"))) %>%
mutate(detected = inclusion_reads > 0) %>%
dplyr::select(sample,disease_group2,detected,tissue_clean) %>%
unique() %>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(n_sample_detected = sum(detected)) %>%
dplyr::select(tissue_clean,disease_group2,n_sample,n_sample_detected) %>%
unique() %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
ungroup() %>%
filter(grepl("ALS-TDP|FTLD-T",disease_group2)) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Motor Cortex",
"Thoracic Spinal Cord")) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(cortex = ifelse(grepl(" Cord",tissue_clean),"cot","cor")) %>%
ggplot() +
geom_col(aes(x = detection_name, y = detection_rate,fill = tissue_clean),position = position_dodge2(width = 0.8, preserve = "single")) +
ggpubr::theme_pubr() +
ylab("Percent of Tissues \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N tissues") +
facet_wrap(~tissue_clean,scales = 'free_x',nrow = 3) +
scale_y_continuous(lim = c(0,1),labels = scales::percent,expand = c(0,0) ) +
theme(axis.text.x = element_text(size = 14)) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
scale_fill_manual(values = c("#4C97B5","#C87156","#4C97B5","#C87156","#4C97B5","#C87156")) +
theme(legend.position = "none")
boxplot_table %>%
left_join(clean_data_table %>% dplyr::select(sample, call)) %>%
unique() %>%
filter(!(tissue_clean %in% c("Cerebellum","Hippocampus","Occipital Cortex"))) %>%
mutate(detected = inclusion_reads > 0) %>%
dplyr::select(sample,disease_group2,detected,tissue_clean,call) %>%
unique() %>%
group_by(disease_group2,tissue_clean,call) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(n_sample_detected = sum(detected)) %>%
dplyr::select(tissue_clean,disease_group2,n_sample,n_sample_detected) %>%
unique() %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
ungroup() %>%
filter(grepl("ALS-TDP|FTLD-T",disease_group2)) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Motor Cortex",
"Thoracic Spinal Cord")) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(cortex = ifelse(grepl(" Cord",tissue_clean),"cot","cor")) %>%
ggplot() +
geom_col(aes(x = detection_name, y = detection_rate,fill = call),position = position_dodge2(width = 0.8, preserve = "single")) +
ggpubr::theme_pubr() +
ylab("Percent of Tissues \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N tissues") +
facet_wrap(~tissue_clean,scales = 'free_x',nrow = 3) +
scale_y_continuous(lim = c(0,1),labels = scales::percent,expand = c(0,0) ) +
theme(axis.text.x = element_text(size = 14)) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
theme(legend.position = "none")
## Joining, by = "sample"
## Adding missing grouping variables: `call`
boxplot_table %>%
filter(tissue_clean %in% c("Frontal Cortex", "Lumbar Spinal Cord", "Cervical Spinal Cord","Motor Cortex", "Temporal Cortex", "Cerebellum") )%>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex",
"Motor Cortex")) %>%
mutate(disease_group2 = fct_relevel(disease_group2,"Control","ALS \n non-TDP","ALS-TDP")) %>%
dplyr::select(disease_group2,
tissue_clean,
junction_name,
value) %>%
unique() %>%
ggplot(aes(x = disease_group2,
y = value * 10^6,
fill = junction_name)) +
geom_boxplot(show.legend = F,position = position_dodge(preserve = "single",width = 1)) +
geom_point(pch = 21, position = position_jitterdodge(dodge.width=1,jitter.width = 0.3))+
scale_y_log10() +
ggplot2::facet_wrap(vars(tissue_clean),scales = "free_x",nrow = 3) +
ylab("UNC13A cryptic \n reads per million") +
theme(text = element_text(size = 12)) +
xlab("") +
scale_fill_manual(values = colorblind_pal()(4)[2:4])
disease_comparisons = list( c("Control","ALS-TDP"),
c("Control","ALS \n non-TDP"),
c("Control","FTLD-TDP"),
c("Control","FTLD \n non-TDP" ))
table_to_test = boxplot_table %>%
filter(disease_tissue == T) %>%
group_by(disease_group2) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
dplyr::select(detection_name,
participant_id,
inclusion_reads,
disease_group2,
tissue_clean,
disease_tissue) %>%
unique()
test_pair = pairwise.wilcox.test(table_to_test$inclusion_reads, table_to_test$detection_name,
p.adjust.method = "BH") %>% broom::tidy()
test_pair = test_pair %>%
mutate(p_value_draw = case_when(p.value < 0.0001~ "***",
p.value < 0.01 ~ "**",
p.value < 0.05 ~ "*",
TRUE ~ paste0("Adj. p-value \n",as.character(round(p.value,2))))) %>%
mutate(y.position = seq(0.25,by = 0.1,length.out = 7))
table_to_test %>%
ggplot(aes(x = detection_name, y = inclusion_reads * 10^6)) +
geom_boxplot() +
geom_jitter(height = 0) +
scale_y_log10() +
ggpubr::theme_pubr() +
ylab("UNC13A cryptic inclusion \n reads per million") +
theme(text = element_text(size = 24)) +
xlab("N samples") +
stat_pvalue_manual(test_pair %>% filter(p.value < 0.05),
label = "p_value_draw",size = 8) +
stat_compare_means(size = 8)
boxplot_table %>%
filter(junction_name %in% c(" Novel Donor"," Short Novel Acceptor" )) %>%
group_by(sample) %>%
summarise(new_inc = sum(value),
disease_group2 = disease_group2,
tissue_clean = tissue_clean,
sample = sample) %>%
ungroup() %>%
unique() %>%
filter(!(tissue_clean %in% c("Hippocampus","Occipital Cortex","Cerebellum"))) %>%
ggplot(aes(x = disease_group2, y = new_inc * 10^6)) +
geom_boxplot() +
geom_jitter(height = 0) +
scale_y_log10() +
facet_wrap(~tissue_clean) +
ggpubr::theme_pubr() +
ylab("UNC13A cryptic inclusion \n reads per million") +
theme(text = element_text(size = 24)) +
xlab("N samples")
## `summarise()` has grouped output by 'sample'. You can override using the `.groups` argument.
####ALS reads per tissue####
boxplot_table %>%
filter(disease_group2 %in% c("Control",
"ALS \n non-TDP",
"ALS-TDP")) %>%
filter(tissue_clean %in% c("Motor Cortex","Cervical Spinal Cord","Lumbar Spinal Cord")) %>%
dplyr::select(disease_group2,
tissue_clean,
junction_name,
value) %>%
unique() %>%
mutate(disease_group2 = fct_relevel(disease_group2,"Control","ALS \n non-TDP","ALS-TDP")) %>%
mutate(tissue_clean = fct_relevel(tissue_clean,"Motor Cortex")) %>%
mutate(y_val = value * 10^6) %>%
ggbarplot( x = "disease_group2",
y = "y_val",
color = 'junction_name',
fill = 'junction_name',
position = position_dodge(0.8),
add = c("mean_se","jitter"),
facet.by = c("junction_name","tissue_clean"))+
scale_fill_manual(values = colorblind_pal()(4)[2:4]) +
scale_color_manual(
values = c("#1C2617","#262114","#262115")
) +
theme(axis.text.x = element_text(size = 24)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 24)) +
ylab("UNC13A cryptic \n reads per million") +
xlab("") +
theme(legend.position = 'none') +
theme(legend.title = element_blank()) +
ylim(0,0.2) +
theme(
strip.background.y = element_blank(),
strip.text.y = element_blank()
) +
guides(color = FALSE)
####FTLD reads per tissue####
boxplot_table %>%
filter(tissue_clean %in% c("Cerebellum","Frontal Cortex","Temporal Cortex")) %>%
dplyr::select(disease_group2,
tissue_clean,
junction_name,
value) %>%
unique() %>%
mutate(y_val = value * 10^6) %>%
mutate(tissue_clean = fct_relevel(tissue_clean,"Cerebellum",after = Inf)) %>%
mutate(disease_group2 = fct_relevel(disease_group2,"Control","ALS \n non-TDP","ALS-TDP")) %>%
ggbarplot( x = "disease_group2",
y = "y_val",
color = 'junction_name',
fill = 'junction_name',
position = position_dodge(0.8),
add = c("mean_se","jitter"),
facet.by = c("junction_name","tissue_clean")) +
scale_fill_manual(values = colorblind_pal()(4)[2:4]) +
scale_color_manual(
values = c("#1C2617","#262114","#262115")
) +
theme(axis.text.x = element_text(size = 24)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 24)) +
ylab("UNC13A cryptic \n reads per million") +
xlab("") +
theme(legend.position = 'top') +
theme(legend.title = element_blank()) +
ylim(0,0.2) +
theme(
strip.background.y = element_blank(),
strip.text.y = element_blank()
) +
scale_x_discrete(guide = guide_axis(n.dodge=2)) +
guides(color = FALSE)
Separated by tissue only in ALS/FTLD-TDP
clean_data_table %>%
filter(UNC13A_annotated_leaf > 10) %>%
mutate(unc13_d = unc13a_cryptic_leaf_psi_full > 0) %>%
filter(disease_tissue == T) %>%
ggbarplot(,
x = "disease_group2",
add = c("mean_se","jitter"),
y = "UNC13A_TPM",
fill = 'unc13_d',
color = 'unc13_d',
facet.by = 'tissue_clean',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(name = "",
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(name = "",
values = c("#1C2617","#262114")
) +
ylab("UNC13A TPM") +
xlab("") +
guides(color = FALSE) +
theme(text = element_text(size = 20)) +
facet_wrap(~tissue_clean, scales = 'free') +
ggtitle("UNC13A Cryptic Detected") +
stat_compare_means()
Separated by tissue only in ALS/FTLD-TDP
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = UNC13A_TPM)) +
geom_point() +
stat_cor(size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A TPM") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
facet_wrap(~tissue_clean,scales = "free_y")
## `geom_smooth()` using formula 'y ~ x'
No Tissue separation
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = UNC13A_TPM)) +
geom_point() +
stat_cor(size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A TPM") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
ylim(0,50)
## `geom_smooth()` using formula 'y ~ x'
#tissue separated
clean_data_table %>%
filter(unc13a_cryptic_leaf_psi_full > 0 ) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
ggplot(aes(x = unc13a_cryptic_leaf_psi_full, y = UNC13A_TPM, color = disease_group2)) +
geom_point() +
stat_cor(size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
ylab("UNC13A TPM") +
xlab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
facet_wrap(~tissue_clean,scales = 'free')
## `geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(unc13a_cryptic_leaf_psi_full > 0) %>%
ggplot(aes(x = unc13a_cryptic_leaf_psi_full, y = UNC13A_TPM)) +
geom_point() +
stat_cor(label.y = 38,size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
ylab("UNC13A TPM") +
xlab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 22)) +
ylim(0,40)
## `geom_smooth()` using formula 'y ~ x'
####scatter plot showing the correlation in non-log space in STMN2 and cryptic PSI####
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = unc13a_cryptic_leaf_psi)) +
geom_point() +
stat_cor(size = 8,method = "spearman",cor.coef.name = "rho") +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18))
## `geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,call,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi_full) %>%
mutate(unc13a_normalized_psi_to_stmn2 = log2((unc13a_cryptic_leaf_psi_full +1)/ (stmn_2_cryptic_psi_leaf + 1))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(tissue_clean) %>%
mutate(psi_mean = median(unc13a_normalized_psi_to_stmn2)) %>%
ungroup() %>%
mutate(tissue_clean = fct_reorder(tissue_clean,-psi_mean)) %>%
ggbarplot( x = "tissue_clean",
y = "unc13a_normalized_psi_to_stmn2",
position = position_dodge(0.8),
add = c("mean_se","jitter"),
facet.by = 'disease_group2') +
ylab("Log2Fold Ratio \n UNC13A Cryptic to STMN2 Cryptic") +
xlab("") +
theme(text = element_text(size = 24)) +
theme(legend.position = 'bottom') +
facet_wrap(~disease_group2, scales = 'free_x')
####boxplot of full fix cryptic psi in als-tdp and ftld-tdp by genotype and tissue####
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(unc13a_cryptic_leaf_psi_full > 0 ) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,call,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
ggboxplot( x = "tissue_clean",
y = "unc13a_cryptic_leaf_psi",
fill = "call",
facet.by = 'disease_group2') +
xlab("") +
geom_jitter(height = 0) +
theme(text = element_text(size = 24)) +
theme(legend.position = 'bottom') +
facet_wrap(~disease_group2, scales = 'free')
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,call,STMN2_TPM,UNC13A_TPM) %>%
mutate(unc13a_normalized_psi_to_stmn2 = log2((UNC13A_TPM +1)/ (STMN2_TPM + 1))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(tissue_clean) %>%
mutate(psi_mean = median(unc13a_normalized_psi_to_stmn2)) %>%
ungroup() %>%
mutate(tissue_clean = fct_reorder(tissue_clean,-psi_mean)) %>%
ggbarplot( x = "tissue_clean",
y = "unc13a_normalized_psi_to_stmn2",
position = position_dodge(0.8),
add = c("mean_se","jitter"),
facet.by = 'disease_group2') +
ylab("Log2Fold Ratio \n UNC13A Cryptic to STMN2 Cryptic") +
xlab("") +
theme(text = element_text(size = 24)) +
theme(legend.position = 'bottom') +
facet_wrap(~disease_group2, scales = 'free_x')
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(participant_id, sample,tissue_clean,disease_group2,call,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi_full) %>% add_count(participant_id) %>%
filter(n == 5) %>%
dplyr::select(-n) %>%
data.table::melt() %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
ggplot(aes(x = tissue_clean, y = value, color = participant_id)) +
geom_point() +
geom_line(aes(group = participant_id)) +
theme(text = element_text(size = 24)) +
theme(legend.position = 'none') +
facet_grid(~variable)
scale_color_manual(values = c("#D55E00","#0072B2"))
## <ggproto object: Class ScaleDiscrete, Scale, gg>
## aesthetics: colour
## axis_order: function
## break_info: function
## break_positions: function
## breaks: waiver
## call: call
## clone: function
## dimension: function
## drop: TRUE
## expand: waiver
## get_breaks: function
## get_breaks_minor: function
## get_labels: function
## get_limits: function
## guide: legend
## is_discrete: function
## is_empty: function
## labels: waiver
## limits: NULL
## make_sec_title: function
## make_title: function
## map: function
## map_df: function
## n.breaks.cache: NULL
## na.translate: TRUE
## na.value: NA
## name: waiver
## palette: function
## palette.cache: NULL
## position: left
## range: <ggproto object: Class RangeDiscrete, Range, gg>
## range: NULL
## reset: function
## train: function
## super: <ggproto object: Class RangeDiscrete, Range, gg>
## rescale: function
## reset: function
## scale_name: manual
## train: function
## train_df: function
## transform: function
## transform_df: function
## super: <ggproto object: Class ScaleDiscrete, Scale, gg>
tissue_to_show_correlation = clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(call = fct_relevel(call,
"C/C", "C/G", "G/G")) %>%
filter(!grepl("Cord",tissue_clean)) %>%
mutate(log10_stmn2 = log10(stmn_2_cryptic_psi_leaf)) %>%
mutate(log10_unc13a = log10(unc13a_cryptic_leaf_psi)) %>%
filter(is.finite(log10_unc13a) & is.finite(log10_stmn2))
tissue_to_show_correlation %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(call = fct_relevel(call,
"C/C", "C/G", "G/G")) %>%
filter(!grepl("Cord",tissue_clean)) %>%
mutate(log10_stmn2 = log10(stmn_2_cryptic_psi_leaf)) %>%
mutate(log10_unc13a = log10(unc13a_cryptic_leaf_psi)) %>%
ggpubr::ggscatter(.,
x = "log10_stmn2",
y = "log10_unc13a",
color = 'call',
add = "reg.line", size = 3
) +
ylab("UNC13A CE PSI ") +
xlab("STMN2 Cryptic PSI ") +
stat_cor(aes( x = stmn_2_cryptic_psi_leaf,
y = unc13a_cryptic_leaf_psi,
color = call),
method = 'spearman',
cor.coef.name = 'rho',
show.legend = FALSE,
size = 14) +
scale_color_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
theme(legend.title = element_blank()) +
theme(text = element_text(size = 24),legend.text = element_text(size = 32))
## `geom_smooth()` using formula 'y ~ x'
overall_fisher = clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(unc13a_detected = unc13a_cryptic_leaf_psi > 0) %>%
dplyr::select(participant_id,unc13a_detected,call) %>%
unique() %>%
group_by(call) %>%
mutate(n_sample = n_distinct(participant_id)) %>%
mutate(n_sample_detected = sum(unc13a_detected)) %>%
dplyr::select(call,n_sample,n_sample_detected) %>%
unique() %>%
mutate(n_non_detected = n_sample - n_sample_detected) %>%
dplyr::select(-n_sample)
over_p = overall_fisher %>%
column_to_rownames('call') %>%
chisq.test() %>%
broom::tidy() %>%
.$p.value
overall_fisher %>%
mutate(n_sample = n_non_detected + n_sample_detected) %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(detection_name = glue::glue("{call} \n ( {n_sample} )")) %>%
ggplot(aes(x = detection_name, y = detection_rate, fill = detection_name)) +
geom_col(show.legend = F) +
ggpubr::theme_pubr() +
scale_y_continuous(labels = scales::percent) +
ylab("% of TDP-43 Proteionopathy Patients \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N individuals") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739"))
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(unc13a_detected = unc13a_cryptic_leaf_psi > 0) %>%
dplyr::select(sample,unc13a_detected,call,tissue_clean) %>%
unique() %>%
group_by(call,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(n_sample_detected = sum(unc13a_detected)) %>%
dplyr::select(call,n_sample,n_sample_detected) %>%
unique() %>%
mutate(n_non_detected = n_sample - n_sample_detected) %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
ggplot(aes(x = tissue_clean, y = detection_rate, fill = call)) +
geom_col(show.legend = F,position = position_dodge2()) +
geom_text(aes(label = n_sample,y = 0),vjust = 1,size = 11,position = position_dodge(width = 1)) +
ggpubr::theme_pubr() +
scale_y_continuous(labels = scales::percent) +
ylab("% of Tissue samples \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N individuals") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739"))
## Adding missing grouping variables: `tissue_clean`
over_p = overall_fisher %>%
column_to_rownames('call') %>%
chisq.test() %>%
broom::tidy() %>%
.$p.value
overall_fisher %>%
mutate(n_sample = n_non_detected + n_sample_detected) %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(detection_name = glue::glue("{call} \n ( {n_sample} )")) %>%
ggplot(aes(x = detection_name, y = detection_rate, fill = detection_name)) +
geom_col(show.legend = F) +
ggpubr::theme_pubr() +
scale_y_continuous(labels = scales::percent) +
ylab("% of TDP-43 Proteionopathy Patients \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N individuals") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739"))
Although this difference is not significant, with the Fisher’s exact giving a p-value of 0.28.
Only looking at disease relevant tissue and in patients we see that the detection rate depends on both the level of TPD-43 pathology present, as measured by STMN2 CE expression,
detection_table = clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(call = fct_relevel(call,
"C/C", "C/G", "G/G")) %>%
mutate(stmn2_psi_groups = as.numeric(cut_interval(log10(stmn_2_cryptic_psi), n = 2))) %>%
mutate(stmn2_psi_groups = ifelse(is.na(stmn2_psi_groups)," No STMN2",stmn2_psi_groups)) %>%
mutate(stmn2_psi_groups = case_when(stmn2_psi_groups == 1 ~ " Low STMN2",
stmn2_psi_groups == 2 ~ "High STMN2",
TRUE ~ stmn2_psi_groups)) %>%
mutate(unc13a_detected = unc13a_cryptic_leaf_psi > 0) %>%
group_by(call,unc13a_detected,stmn2_psi_groups) %>%
add_count(name = "genotype_detected") %>%
dplyr::select(call,unc13a_detected,genotype_detected,stmn2_psi_groups) %>%
unique() %>%
pivot_wider(names_from = "unc13a_detected",
values_from = "genotype_detected") %>%
dplyr::rename(unc_not_detected = `FALSE`, unc_cryptic_detected = `TRUE`) %>%
mutate(total_tissue = (unc_not_detected + unc_cryptic_detected)) %>%
mutate(detection_rate = unc_cryptic_detected / total_tissue) %>%
ungroup() %>%
mutate(stmn2_psi_groups = fct_relevel(stmn2_psi_groups, "No STMN2", after = 0)) %>%
mutate(detection_name = glue::glue("{call} \n ( {total_tissue} )"))
detection_table %>%
mutate(error = sqrt((detection_rate * (1-detection_rate))/total_tissue)) %>%
ggplot(aes(x = call, y = detection_rate,fill = call)) +
geom_col(show.legend = F,position = 'dodge2') +
geom_errorbar(aes(ymin = detection_rate - error, ymax = detection_rate + error),
position = position_dodge(0.9),
width=0.2) +
facet_wrap(~stmn2_psi_groups) +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
scale_y_continuous(lim = c(0,0.5),labels = scales::percent) +
ylab("Percent of Samples Detected") +
ggpubr::theme_pubr() +
geom_text(aes(label = total_tissue,y = 0),vjust = 1,size = 11) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 32)) +
xlab("N Tissues")
UNC13A CE PSI
genotype_comparisons = list(c("C/C", "C/G"), c("C/C", "G/G"), c("C/G", "G/G"))
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(call = fct_relevel(call,
"C/C", "C/G", "G/G")) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(stmn_2_cryptic_psi_leaf > 0) %>%
ggbarplot( x = "call",
y = "unc13a_cryptic_leaf_psi",
color = 'call',
position = position_dodge(0.8),
add = c("mean_se","jitter")) +
ylab("UNC13A CE PSI") +
scale_color_manual(values = c("#88CCEE","#44AA99","#105e2a")) +
theme(text = element_text(size = 20)) +
stat_compare_means(aes(group = call), label = "p.format") +
stat_compare_means(aes(group = call),comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(call = fct_relevel(call,
"C/C", "C/G", "G/G")) %>%
mutate(stmn2_psi_groups = as.numeric(cut_interval(log10(stmn_2_cryptic_psi), n = 2))) %>%
mutate(stmn2_psi_groups = ifelse(is.na(stmn2_psi_groups)," No STMN2",stmn2_psi_groups)) %>%
mutate(stmn2_psi_groups = case_when(stmn2_psi_groups == 1 ~ " Low STMN2",
stmn2_psi_groups == 2 ~ "High STMN2",
TRUE ~ stmn2_psi_groups)) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
ggbarplot( x = "call",
y = "cryptic_psi",
color = 'call',
facet.by = 'stmn2_psi_groups',
position = position_dodge(0.8),
add = c("mean_se","jitter")) +
ylab("UNC13A CE PSI") +
scale_color_manual(values = c("#88CCEE","#44AA99","#105e2a")) +
theme(text = element_text(size = 20)) +
stat_compare_means(aes(group = call), label = "p.format") +
stat_compare_means(aes(group = call),comparisons = genotype_comparisons,label = "p.signif") +
xlab("")
Across our KD’s, we observed a clear allelic bias in the NB cells
kd_vafs = fread(file.path(here::here(),"data","all_kds_unc13.snp.out.tsv"))
kd_vafs %>%
left_join(rel_rna_cryptic_amount) %>%
filter(!is.na(condition)) %>%
mutate(VAF = alt_reads / total) %>%
filter(condition != "control" ) %>%
filter(alt_reads > 5) %>%
ggplot(aes(x = stmn_2_cryptic_psi,y = cryptic_psi, fill = -(VAF - 0.5),size = total)) +
geom_point(pch = 21) +
coord_fixed() +
ylim(0,0.8) +
xlim(0,0.8) +
geom_abline() +
scale_fill_gradient2()
## Joining, by = "sample"
kd_vafs %>%
left_join(rel_rna_cryptic_amount) %>%
filter(!is.na(condition)) %>%
filter(condition != "control" ) %>%
mutate(VAF = alt_reads / total) %>%
filter(source %in% c("sh_dzap","shsy5y_normed","ipsc_normed_ward")) %>%
dplyr::select(sample,ref_reads,alt_reads,TARDBP,source,VAF) %>%
data.table::melt(id.vars = c("sample","TARDBP",'source','VAF')) %>%
mutate(sample = fct_reorder(sample,-TARDBP)) %>%
ggplot(aes(x = sample,y = value, fill = variable)) +
geom_col(pch = 21, size = 4) +
coord_flip()
## Joining, by = "sample"
Also look at the coverage in the Liu nuclear facs sorted neurons
liu_vafs = fread(file.path(here::here(),"data","liu_facs_neurons_unc13.snp.out.tsv"))
liu_stmn2 = fread(file.path(here::here(),"data","liu_stmn2_and_unc13aaggregated.clean.annotated.bed"))
liu_stmn2[,sample := gsub(".SJ.out","",V4)]
liu_stmn2 = liu_stmn2 %>%
unique() %>%
pivot_wider(id_cols = 'sample',
names_from = "V7",
values_from = 'V5',
values_fill = 0)
liu_stmn2 <- liu_stmn2 %>%
mutate(stmn2_psi = STMN2_cryptic / (STMN2_cryptic + STMN2_annotated)) %>%
mutate(unc13a_psi = (UNC13A_3prime + UNC13A_5prime + UNC13A_5prime_2)/ (UNC13A_annotated + UNC13A_3prime + UNC13A_5prime + UNC13A_5prime_2))
liu_vafs %>%
mutate(VAF = alt_reads / total) %>%
data.table::melt(id.vars = c("sample",'VAF','total')) %>%
mutate(allele = ifelse(variable == "ref_reads", "C","G")) %>%
mutate(sample_name = sub("_TDP_.*", "", sample)) %>%
mutate(sample_name = gsub("_unsorted", "", sample_name)) %>%
mutate(condition = sub(".*_", "", sample)) %>%
ggplot(aes(x = condition,y = value, fill = allele)) +
geom_col(pch = 21, size = 4) +
facet_wrap(~sample_name,scales = "free")
liu_stmn2 %>%
dplyr::select(stmn2_psi,sample,unc13a_psi) %>%
data.table::melt() %>%
mutate(sample_name = sub("_TDP_.*", "", sample)) %>%
filter(!grepl("unsorted",sample_name)) %>%
mutate(condition = sub(".*_", "", sample)) %>%
mutate(condition = ifelse(grepl("negative",condition), "TDP-43 \nNegative", "TDP-43 \nPositive")) %>%
mutate(variable = ifelse(grepl("stmn2",variable), "STMN2", "UNC13A")) %>%
ggplot(aes(x = condition,y = value, fill = variable)) +
geom_col(size = 4,position = 'dodge') +
facet_wrap(~sample_name,scales = "free_x") +
scale_fill_manual(values = c("#004D40","#882255")) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'top') +
theme(legend.title=element_blank())
## Using sample as id variables
liu_stmn2 %>%
mutate(stmn2_psi = STMN2_cryptic / (STMN2_cryptic + STMN2_annotated)) %>%
mutate(unc13a_psi = (UNC13A_3prime + UNC13A_5prime)/ (UNC13A_annotated + UNC13A_3prime + UNC13A_5prime)) %>%
left_join(liu_vafs) %>%
mutate(alt_vaf = (alt_reads)/total) %>%
mutate(condition = ifelse(grepl("negative",sample),"negative", "positive")) %>%
filter(!grepl("unsorted",sample)) %>%
ggplot(aes(y = unc13a_psi,x = stmn2_psi, fill = alt_vaf)) +
geom_point(pch = 21,size = 5) +
ggtitle("STMN2 PSI and the VAF of the G allele \n in the Liu Nuclei ") +
facet_grid(~condition) +
scale_fill_viridis_c(option = "plasma") +
coord_fixed() +
geom_abline()
## Joining, by = "sample"
nycg_vafs = fread(file.path(here::here(),"data","NYGC_all_samples_UNC13A_snp_coverage.tsv"),fill = T)
nycg_vafs %>%
left_join(clean_data_table) %>%
filter(call == "C/G") %>%
mutate(fraction_risk = alt_reads / total) %>%
filter(!is.na(fraction_risk)) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = unc13a_cryptic_leaf_psi)) +
geom_point(size = 4, pch = 21, aes(fill = fraction_risk - 0.5)) +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A CE PSI") +
geom_abline() +
scale_fill_gradient2()
## Joining, by = "sample"
ftld_comp <- list( c("Control", "FTLD \n non-TDP"),
c("Control", "FTLD-TDP"), c("FTLD-TDP", "FTLD \n non-TDP") )
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(tissue_clean == "Frontal_Cortex") %>%
ggplot(aes(x = disease_group2, y = UNC13A_TPM)) +
geom_boxplot() +
facet_wrap(~tissue_clean,scales = "free") +
stat_compare_means(comparisons = ftld_comp)